# A lexer for the Calcula language written in Ruby.
#
# @author Paul T.
class Calcula::Lexer

  # An error that is raised because of lexing failures.
  #
  # @author Paul T.
  class LexerError < StandardError
  end

  ZERO_TO_NINE = "0".."9"
  LOWER_A_TO_Z = "a".."z"
  UPPER_A_TO_Z = "A".."Z"

  SINGLE_CHAR_TOKEN = {
    "(" => :PAREN_O, ")" => :PAREN_C,
    "[" => :SQUARE_O, "]" => :SQUARE_C,
    "+" => :OP_ADD, "-" => :OP_SUB,
    "%" => :OP_REM, "$" => :ROUND_DOLLAR,
    "@" => :COMPOSE, ";" => :DISP,
    "," => :COMMA, "\\" => :LAMBDA,
    "=" => :OP_EQ,

    " " => :WS, "\t" => :WS, "\r" => :WS,
    "\f" => :WS, "\n" => :WS
  }

  # Constructs a new Lexer instance
  #
  # @param src [String] The source code
  def initialize(src)
    @src = src
    @i = 0
    @lineNum = 0
    @linePos = 0
  end

  # Raises a error with a seemingly useful message
  #
  # @raise [RuntimeError] Calling this method raises this error
  # @param msg [String] The additional message or the reason of the error
  def lexError(msg)
    raise LexerError, "Failed to lex past (L#{@lineNum + 1}:#{@linePos + 1}): #{if msg != "" then msg else "<no message>" end}"
  end

  # Advances the line number and the column position based on the character found
  #
  # @param c [Character] The character used to determine the advancing rule
  def advancePos(c)
    @i += 1
    if c == '\n' then
      @lineNum += 1
      @linePos = 0
    else
      @linePos += 1
    end
  end

  # Consumes characters until the block yields false. The first character is
  # consumed no matter what.
  #
  # @param pre_append [Optional, lambda] Executed before appending. If returns false, the process terminates and the consumed chars are returned
  # @param post_append [Optional, lambda] Executed after appending. If returns false, the process terminates and the consumed chars are returned
  # @yield [c] Predicate for whether or not character consumption should continue
  # @yieldparam c [String (Char)] The current character
  # @yieldreturn [true, false] true if consumption should continue, false otherwise
  # @return [String] The characters being consumed
  def consumeWhen(pre_append: ->(_c){ true }, post_append: ->(_c){ true })
    advancePos(buf = @src[@i])
    while @i < @src.length && yield(@src[@i]) do
      break unless pre_append.call(@src[@i])
      buf += @src[@i]
      advancePos(@src[@i])
      break unless post_append.call(@src[@i])
    end
    buf
  end

  # Starts the lexing routine which converts the source code into a list of tokens
  #
  # @return [Array<Calcula::Token>] The tokens based on the source code
  def lex
    len = @src.length
    rst = []
    while @i < len do
      case @src[@i]
      when "*" then
        rst << potentialDuplicateToken(:OP_MUL, :OP_POW)
      when "/" then
        rst << potentialDuplicateToken(:OP_DIV, :RAT)
      when "#" then
        rst << Calcula::Token.new(:COMMENT, consumeWhen { |c| c != "\n" }, lineNum: @lineNum + 1, linePos: @linePos + 1)
      when "!" then
        rst << continuousConsume(:ASSERT, {"=" => :OP_NE})
      when "<" then
        rst << continuousConsume(:OP_LT, {"=" => :OP_LE})
      when ">" then
        rst << continuousConsume(:OP_GT, {"=" => :OP_GE})
      when LOWER_A_TO_Z, UPPER_A_TO_Z then
        txtSeq = consumeWhen { |c| isIdentChar? c }
        case txtSeq
        when "let", "and", "or", "not" then # The textual keywords
          rst << Calcula::Token.new(txtSeq.upcase.to_sym, txtSeq, lineNum: @lineNum + 1, linePos: @linePos + 1)
        else
          rst << Calcula::Token.new(:ID, txtSeq, lineNum: @lineNum + 1, linePos: @linePos + 1)
        end
      when ZERO_TO_NINE then
        inDecimals = false
        txt = consumeWhen(pre_append: ->(c){
          if c == "." then
            if !inDecimals then
              inDecimals = true
            else
              lexError("Numbers with two or more decimal points are illegal")
            end
          end
          true
        }) { |c| isNumericChar? c }
        rst << Calcula::Token.new(:NUM, txt, lineNum: @lineNum + 1, linePos: @linePos + 1)
      else
        if (type = SINGLE_CHAR_TOKEN[chr = @src[@i]]) != nil then
          advancePos(@src[@i])
          rst << Calcula::Token.new(type, chr, lineNum: @lineNum + 1, linePos: @linePos + 1)
        else
          lexError("Unrecognized token starting with '#{@src[@i]}'")
        end
      end
    end
    return rst
  end

  # Resets the lexing position
  def reset
    @i = 0
    @lineNum = 0
    @linePos = 0
  end

  # Checks to see if the character is part of a valid identifier
  #
  # @param c [String (Char)] The character
  # @return [true, false] If the character is valid
  def isIdentChar?(c)
    LOWER_A_TO_Z === c || UPPER_A_TO_Z === c || "'" == c
  end

  # Checks to see if the character is numeric (including the decimal point)
  #
  # @param c [String (Char)] The character
  # @return [true, false] If the character is valid
  def isNumericChar?(c)
    ZERO_TO_NINE === c || "." == c
  end

  # Checks if the two characters in sequence are the same and form a token by
  # itself. Short hand for `continuousConsume(idSingle, {current_char => idDouble})`.
  #
  # @param idSingle [Symbol] If the first character itself is an individual token
  # @param idDouble [Symbol] If both characters are interpreted as a token
  # @return [Calcula::Token] The token with either `idSingle` or `idDouble` as type
  def potentialDuplicateToken(idSingle, idDouble)
    previous = @src[@i]
    continuousConsume(idSingle, {previous => idDouble})
  end

  # Checks if the consecutive character forms another token
  #
  # @param idSingle [Symbol] The token type for the first character alone
  # @param continuousTok [Hash{String (Char) => Symbol}] The consecutive character
  # @return [Calcula::Token] The token found
  def continuousConsume(idSingle, continuousTok)
    advancePos(buf = @src[@i])
    if (maybeId = continuousTok[@src[@i]]) == nil then
      maybeId = idSingle
    else
      buf += @src[@i]
      advancePos(@src[@i])
    end
    return Calcula::Token.new(maybeId, buf, lineNum: @lineNum + 1, linePos: @linePos + 1)
  end
end
